From 14fe619b89b42b4c9974507a85cf7d98439e3777 Mon Sep 17 00:00:00 2001
From: robertl <robertl@f51c46e8-681c-474f-0cfe-069cfd0219fb>
Date: Tue, 6 Apr 2004 20:58:32 +0000
Subject: [PATCH] Add HTML and Text formats.   From Scott at brynen.com.
 util.c: tweak html honkers.

---
 gpsbabel/Makefile |   2 +-
 gpsbabel/README   |  13 +++
 gpsbabel/defs.h   |   2 +
 gpsbabel/html.c   | 164 +++++++++++++++++++++++++++++++++++++
 gpsbabel/text.c   | 133 ++++++++++++++++++++++++++++++
 gpsbabel/util.c   | 205 +++++++++++++++++++++++++++++++++++-----------
 gpsbabel/vecs.c   |  14 ++++
 7 files changed, 485 insertions(+), 48 deletions(-)
 create mode 100644 gpsbabel/html.c
 create mode 100644 gpsbabel/text.c

diff --git a/gpsbabel/Makefile b/gpsbabel/Makefile
index c1d64145c..97976c4ce 100644
--- a/gpsbabel/Makefile
+++ b/gpsbabel/Makefile
@@ -19,7 +19,7 @@ FMTS=magproto.o gpx.o geo.o mapsend.o mapsource.o \
 	psp.o holux.o garmin.o tmpro.o tpg.o \
 	xcsv.o gcdb.o tiger.o internal_styles.o easygps.o quovadis.o \
 	gpilots.o saroute.o navicache.o psitrex.o geoniche.o delgpl.o \
-	ozi.o nmea.o
+	ozi.o nmea.o text.o html.o
 
 FILTERS=position.o duplicate.o arcdist.o polygon.o smplrout.o reverse_route.o
 
diff --git a/gpsbabel/README b/gpsbabel/README
index c9c48e0a4..3915fd4d1 100644
--- a/gpsbabel/README
+++ b/gpsbabel/README
@@ -497,6 +497,19 @@ THE FORMATS
 	http://www.kolumbus.fi/eino.uikkanen/geoconvgb/index.htm
 	http://www.commlinx.com.au/GPS_recorder.htm
 
+    TEXT
+
+        This is a simple human readable version of the data file, handy for
+        listings of any type of waypoint files.
+
+    HTML
+
+        HTML output generates a single HTML file of all of the waypoints in
+        the input file.  It supports a number of Geocaching GPX extensions,
+        as well as filters out potentially harmful HTML from the input file
+        while maintaining almost all of the source HTML formatting.
+
+
 DATA FILTERS
 
 	GPSBabel supports data filtering.  Data filters are invoked from
diff --git a/gpsbabel/defs.h b/gpsbabel/defs.h
index 20fd14bef..bed556242 100644
--- a/gpsbabel/defs.h
+++ b/gpsbabel/defs.h
@@ -413,7 +413,9 @@ signed int get_tz_offset(void);
 signed int month_lookup(const char *m);
 const char *get_cache_icon(const waypoint *waypointp);
 char * xml_entitize(const char * str);
+char * html_entitize(const char * str);
 char * strip_html(const utf_string*);
+char * strip_nastyhtml(const char * in);
 char * str_utf8_to_cp1252( const char * str );
 char * str_utf8_to_ascii( const char * str );
 
diff --git a/gpsbabel/html.c b/gpsbabel/html.c
new file mode 100644
index 000000000..8a128bb5c
--- /dev/null
+++ b/gpsbabel/html.c
@@ -0,0 +1,164 @@
+/*
+    Output only format for Human Readable formats.
+
+    Copyright (C) 2004 Scott Brynen, scott (at) brynen.com
+    Copyright (C) 2002 Robert Lipe, robertlipe@usa.net
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111 USA
+*/
+
+
+#include "defs.h"
+#include "jeeps/gpsmath.h"
+#include <ctype.h>
+
+static FILE *file_out;
+static void *mkshort_handle;
+
+static char *stylesheet = NULL;
+
+#define MYNAME "HTML"
+
+static
+arglist_t html_args[] = {
+	{ "stylesheet", &stylesheet, 
+		"Path to HTML style sheet", ARGTYPE_STRING },
+	{0, 0, 0, 0}
+};
+
+
+
+static void
+wr_init(const char *fname)
+{
+	file_out = xfopen(fname, "w", MYNAME);
+	mkshort_handle = mkshort_new_handle();
+}
+
+static void
+wr_deinit(void)
+{
+	fclose(file_out);
+	mkshort_del_handle(mkshort_handle);
+}
+
+static void
+html_disp(const waypoint *wpt)
+{
+	int latint, lonint;
+	char tbuf[1024];
+	time_t tm = wpt->creation_time;
+	long utmz;
+	double utme, utmn;
+	char utmzc;
+	
+	lonint = abs(wpt->longitude);
+	latint = abs(wpt->latitude);
+	GPS_Math_WGS84_To_UTM_EN(wpt->latitude, wpt->longitude, 
+		&utme, &utmn, &utmz, &utmzc);
+
+	if (tm == 0) 
+		tm = time(NULL);
+	strftime(tbuf, sizeof(tbuf), "%d-%b-%Y", localtime(&tm));
+
+
+	fprintf(file_out, "<hr>\n");
+	fprintf(file_out, "<a name=\"%s\"></a><table width=\"100%%\"><tr><td>\n", wpt->shortname);
+	fprintf(file_out, "<h3 class=\"waypoint\">%s - %c%d&deg;%06.3f %c%d&deg;%06.3f (%ld%c %6.0f %7.0f)",
+		(global_opts.synthesize_shortnames) ? mkshort(mkshort_handle, wpt->description) : wpt->shortname,
+		wpt->latitude < 0 ? 'S' : 'N',  abs(latint), 60.0 * (fabs(wpt->latitude) - latint), 
+		wpt->longitude < 0 ? 'W' : 'E', abs(lonint), 60.0 * (fabs(wpt->longitude) - lonint),
+		utmz, utmzc, utme, utmn);
+	if (wpt->altitude != unknown_alt) 
+		fprintf (file_out, " alt: %1.1f", wpt->altitude);
+	fprintf (file_out, "<br>\n");
+
+	if (strcmp(wpt->description, wpt->shortname)) {
+		if (wpt->url) {
+			char *d = html_entitize(wpt->description);
+			fprintf(file_out, "<a href=\"%s\">%s</a></h3>\n", wpt->url, d);
+			xfree(d);
+		}
+		else {
+			fprintf(file_out, "%s</h3>\n", wpt->description);
+		}		
+		
+	}
+	if (wpt->gc_data.terr) {
+	        if (wpt->gc_data.desc_short.utfstring) {
+			fprintf (file_out, "<p class=\"descshort\">%s</p>\n", strip_nastyhtml(wpt->gc_data.desc_short.utfstring));
+       		}
+	        if (wpt->gc_data.desc_long.utfstring) {
+			fprintf (file_out, "<p class=\"desclong\">%s</p>\n", strip_nastyhtml(wpt->gc_data.desc_long.utfstring));
+       		}
+		if (wpt->gc_data.hint) {
+			fprintf (file_out, "<p class=\"hint\"><strong>Hint:</strong> %s</p>\n", wpt->gc_data.hint);
+		}
+	}
+	else if (strcmp(wpt->notes,wpt->description)) {
+		fprintf (file_out, "<p class=\"notes\">%s</p>\n", wpt->notes);
+	}
+	fprintf(file_out, "</td></tr></table>\n");
+}
+
+static void
+html_index(const waypoint *wpt)
+{
+	char *sn = html_entitize(wpt->shortname);
+	char *d = html_entitize(wpt->description);
+
+	fprintf(file_out, "<a href=\"#%s\">%s - %s</a><br>\n", sn, sn, d);
+
+	xfree(sn);
+	xfree(d);
+}
+
+static void
+data_write(void)
+{
+	setshort_length(mkshort_handle, 6);
+
+	fprintf(file_out, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">\n");
+	fprintf(file_out, "<html>\n");
+	fprintf(file_out, "<head>\n");
+	fprintf(file_out, " <title>GPSBabel HTML Output</title>\n");
+	fprintf(file_out, " <meta name=\"Generator\" content=\"GPSBabel\">\n");
+	if (stylesheet) 
+		fprintf(file_out, " <link rel=\"stylesheet\" type=\"text/css\" href=\"%s\">\n", stylesheet);
+	fprintf(file_out, "</head>\n");
+	fprintf(file_out, "<body>\n");
+
+	fprintf(file_out, "<p class=\"index\">\n");
+	waypt_disp_all(html_index);
+	fprintf(file_out, "</p>\n");
+	
+	waypt_disp_all(html_disp);
+
+	fprintf(file_out, "</body>");
+	fprintf(file_out, "</html>");
+
+}
+
+
+ff_vecs_t html_vecs = {
+	ff_type_file,
+	NULL,
+	wr_init,
+	NULL,
+	wr_deinit,
+	NULL,
+	data_write,
+	html_args
+};
diff --git a/gpsbabel/text.c b/gpsbabel/text.c
new file mode 100644
index 000000000..cebd5a835
--- /dev/null
+++ b/gpsbabel/text.c
@@ -0,0 +1,133 @@
+/*
+    Output only format for Human Readable formats.
+
+    Copyright (C) 2004 Scott Brynen, scott (at) brynen.com
+    Copyright (C) 2002 Robert Lipe, robertlipe@usa.net
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111 USA
+*/
+
+
+#include "defs.h"
+#include "jeeps/gpsmath.h"
+#include <ctype.h>
+
+static FILE *file_out;
+static void *mkshort_handle;
+
+static char *suppresssep = NULL;
+
+#define MYNAME "TEXT"
+
+static
+arglist_t text_args[] = {
+	{ "nosep", &suppresssep, 
+		"Suppress separator lines between waypoints", ARGTYPE_BOOL },
+	{0, 0, 0, 0}
+};
+
+
+
+static void
+wr_init(const char *fname)
+{
+	file_out = xfopen(fname, "w", MYNAME);
+	mkshort_handle = mkshort_new_handle();
+}
+
+static void
+wr_deinit(void)
+{
+	fclose(file_out);
+	mkshort_del_handle(mkshort_handle);
+}
+
+static void
+text_disp(const waypoint *wpt)
+{
+	int latint, lonint;
+	char tbuf[1024];
+	time_t tm = wpt->creation_time;
+	long utmz;
+	double utme, utmn;
+	char utmzc;
+	
+	lonint = abs(wpt->longitude);
+	latint = abs(wpt->latitude);
+
+	GPS_Math_WGS84_To_UTM_EN(wpt->latitude, wpt->longitude, 
+		&utme, &utmn, &utmz, &utmzc);
+
+	if (tm == 0) 
+		tm = time(NULL);
+	strftime(tbuf, sizeof(tbuf), "%d-%b-%Y", localtime(&tm));
+
+	fprintf(file_out, "%-16s  %c%d %06.3f  %c%d %06.3f  (%ld%c %6.0f %7.0f)",
+		(global_opts.synthesize_shortnames) ? mkshort(mkshort_handle, wpt->description) : wpt->shortname,
+		wpt->latitude < 0 ? 'S' : 'N',  abs(latint), 60.0 * (fabs(wpt->latitude) - latint), 
+		wpt->longitude < 0 ? 'W' : 'E', abs(lonint), 60.0 * (fabs(wpt->longitude) - lonint),
+		utmz, utmzc, utme, utmn);
+	if (wpt->altitude != unknown_alt) 
+		fprintf (file_out, "  alt: %1.1f", wpt->altitude);
+	fprintf (file_out, "\n");
+	if (strcmp(wpt->description, wpt->shortname)) {
+		fprintf(file_out, "%s\n", wpt->description);
+	}
+	if (wpt->gc_data.terr) {
+	        if (wpt->gc_data.desc_short.utfstring) {
+	                char *stripped_html = strip_html(&wpt->gc_data.desc_short);
+			fprintf (file_out, "\n%s\n", stripped_html);
+                	xfree(stripped_html);
+       		}
+	        if (wpt->gc_data.desc_long.utfstring) {
+	                char *stripped_html = strip_html(&wpt->gc_data.desc_long);
+			fprintf (file_out, "\n%s\n", stripped_html);
+                	xfree(stripped_html);
+       		}
+		if (wpt->gc_data.hint) {
+			fprintf (file_out, "\nHint: %s\n", wpt->gc_data.hint);
+		}
+	}
+	else if (strcmp(wpt->notes,wpt->description)) {
+		fprintf (file_out, "%s\n", wpt->notes);
+	}
+	if (! suppresssep) 
+		fprintf(file_out, "-----------------------------------------------------------------------------\n");
+	else
+		fprintf(file_out, "\n");
+		
+	
+}
+
+static void
+data_write(void)
+{
+	if (! suppresssep) 
+		fprintf(file_out, "-----------------------------------------------------------------------------\n");
+	setshort_length(mkshort_handle, 6);
+	waypt_disp_all(text_disp);
+}
+
+
+ff_vecs_t text_vecs = {
+	ff_type_file,
+	NULL,
+	wr_init,
+	NULL,
+	wr_deinit,
+	NULL,
+	data_write,
+	text_args
+};
diff --git a/gpsbabel/util.c b/gpsbabel/util.c
index dfad3fb36..e16b53821 100644
--- a/gpsbabel/util.c
+++ b/gpsbabel/util.c
@@ -784,6 +784,61 @@ char * str_utf8_to_ascii( const char * str )
 	return result;
 }
 
+/* 
+ * Get rid of potentially nasty HTML that would influence another record
+ * that includes;
+ * <body> - to stop backgrounds from being loaded
+ * </body> and </html>- stop processing altogether
+ * <style> </style> - stop overriding styles for everything
+ */
+char *
+strip_nastyhtml(const char * in)
+{
+	char *returnstr, *sp;
+	char *lcstr, *lcp;
+	int i;
+	
+	sp = returnstr = xstrdup(in);
+	lcp = lcstr = xstrdup(in);
+	
+	while (*lcp) {
+		*lcp = tolower(*lcp);
+		lcp++;
+	}
+	while (lcp = strstr(lcstr, "<body")) {   /* becomes <---- */
+		sp = returnstr + (lcp - lcstr) ;
+		sp++; *sp++ = '-'; *sp++ = '-'; *sp++ = '-'; *sp++ = '-'; 
+		*lcp = '*';         /* so we wont find it again */
+	}
+	while (lcp = strstr(lcstr, "</body")) {
+		sp = returnstr + (lcp - lcstr) ; /* becomes </---- */
+		sp++; sp++; *sp++ = '-'; *sp++ = '-'; *sp++ = '-'; *sp++ = '-'; 
+		*lcp = '*';         /* so we wont find it again */
+	}
+	while (lcp = strstr(lcstr, "</html")) {
+		sp = returnstr + (lcp - lcstr) ; /* becomes </---- */
+		sp++; sp++; *sp++ = '-'; *sp++ = '-'; *sp++ = '-'; *sp++ = '-'; 
+		*lcp = '*';         /* so we wont find it again */
+	}
+	while (lcp = strstr(lcstr, "<style")) {
+		sp = returnstr + (lcp - lcstr) ; /* becomes <!--   */
+		sp++; *sp++ = '!'; *sp++ = '-'; *sp++ = '-';  *sp++ = ' '; *sp++ = ' '; *sp = ' ';
+		*lcp = '*';         /* so we wont find it again */
+	}
+	while (lcp = strstr(lcstr, "</style>")) {
+		sp = returnstr + (lcp - lcstr) ; /* becomes    --> */
+		*sp++ = ' '; *sp++ = ' '; *sp++ = ' '; *sp++ = ' '; *sp++ = ' '; *sp++ = '-'; *sp++ = '-'; 
+		*lcp = '*';         /* so we wont find it again */
+	}
+	while (lcp = strstr(lcstr, "<image")) {
+		sp = returnstr + (lcp - lcstr) ; /* becomes <img */
+		sp+=3; *sp++ = 'g'; *sp++ = ' '; *sp++ = ' ';
+		*lcp = '*';
+	}
+	xfree (lcstr);
+	return (returnstr);
+}
+	
 /*
  *  Without getting into all the complexity of technically legal HTML,
  *  this function tries to strip "ugly" parts of it to make it more 
@@ -794,54 +849,96 @@ char *
 strip_html(const utf_string *in)
 {
 	char *outstring, *out;
-	int ctr;
 	char *instr = in->utfstring;
-
+	char tag[8];
+	short int taglen;
+	
 	if (!in->is_html)
 		return in->utfstring;
 	/*
 	 * We only shorten, so just dupe the input buf for space.
 	 */
-	out = outstring = xstrdup(in->utfstring);
-	outstring[0] = 0;
-
-	for(ctr=0; ; instr++) {
-		switch(*instr) {
-			case 0: 
-				return (out);
-
-			case '<':
-				if (instr[1] == 'p')
-					*outstring++ = '\n';
-				ctr++;
-				break;
-			case '>':
-				ctr--;
-				break;
-			case '\n':
-				continue;
-			default:
-				if (ctr == 0) {
-					*outstring++ = *instr;
-				}
+
+	outstring = out = xstrdup(in->utfstring);
+
+	tag[0] = 0;
+	while (*instr) {
+		if ((*instr == '<') || (*instr == '&')) {
+			tag[0] = *instr;
+			taglen = 0;
+		}
+		
+		if (! tag[0]) {
+			if (*instr != '\n')
+				*out++ = *instr;
 		}
+		else {
+			if (taglen < (sizeof(tag)-1)) {
+				tag[taglen++] = tolower(*instr);
+				tag[taglen] = 0;
+			}
+		}
+		
+		if ( ((tag[0] == '<') && (*instr == '>')) ||
+		     ((tag[0] == '&') && (*instr == ';')) ) {
+			if (! strcmp(tag,"&amp;"))
+				*out++ = '&';
+			else if (! strcmp (tag, "&lt;"))
+				*out++ = '<';
+			else if (! strcmp (tag, "&gt;"))
+				*out++ = '>';
+			else if (! strcmp (tag, "&quot;"))
+				*out++ = '"';
+			else if (! strcmp (tag, "&nbsp;"))
+				*out++ = ' ';
+			else if (! strcmp (tag, "&deg;")) {
+				*out++ = 'd'; *out++ = 'e'; *out++ = 'g';
+			}
+			else if ((tag[0]=='<') && (tag[1]=='p'))
+				*out++ = '\n';
+			else if ((tag[0]=='<') && (tag[1]=='b') && (tag[2]=='r'))
+				*out++ = '\n';
+			else if ((tag[0]=='<') && (tag[1]=='/') && (tag[2]=='t') && (tag[3]=='r'))
+				*out++ = '\n';
+			else if ((tag[0]=='<') && (tag[1]=='/') && (tag[2]=='t') && (tag[3]=='d'))
+				*out++ = ' ';
+			else if ((tag[0]=='<') && (tag[1]=='i') && (tag[2]=='m') && (tag[3]=='g')) {
+				*out++ = '['; *out++ = 'I'; *out++ = 'M'; *out++ = 'G'; *out++ = ']';
+			}
+			
+		      tag[0] = 0;
+		}
+		*instr++;
 	}
+	*out++ = 0;
+	return (outstring);
 }
 
-char * xml_entitize(const char * str) 
+typedef struct {
+	const char * text;
+	const char * entity;
+	int  not_html;
+} entity_types;
+
+static 
+entity_types stdentities[] =  {
+	{ "&",	"&amp;", 0 },
+	{ "'", 	"&apos;", 1 },
+	{ "<",	"&lt;", 0 },
+	{ ">",	"&gt;", 0 },
+	{ "\"",	"&quot;", 0 },
+	{ NULL,	NULL, 0 }
+};
+
+static 
+char * 
+entitize(const char * str, int is_html) 
 {
 	int elen, ecount, nsecount;
-	const char ** ep;
+	entity_types *ep;
 	const char * cp;
 	char * p, * tmp, * xstr;
-	const char * stdentities[] = {
-	"&",	"&amp;",
-	"<",	"&lt;",
-	">",	"&gt;",
-	"'", 	"&apos;",
-	"\"",	"&quot;",
-	NULL,	NULL 
-	};
+
 	char tmpsub[20];
 	int bytes = 0;
 	int value = 0;
@@ -849,14 +946,14 @@ char * xml_entitize(const char * str)
 	elen = ecount = nsecount = 0;
 
 	/* figure # of entity replacements and additional size. */
-	while (*ep) {
+	while (ep->text) {
 		cp = str;
-		while ((cp = strstr(cp, *ep)) != NULL) {
-			elen += strlen(*(ep + 1)) - strlen(*ep);
+		while ((cp = strstr(cp, ep->text)) != NULL) {
+			elen += strlen(ep->entity) - strlen(ep->text);
 			ecount++;
-			cp += strlen(*ep);
+			cp += strlen(ep->text);
 		}
-		ep += 2;
+		ep++;
 	}
 	
 	/* figure the same for other than standard entities (i.e. anything
@@ -880,23 +977,23 @@ char * xml_entitize(const char * str)
 		return (tmp);
 
         if ( ecount != 0 ) {	
-		ep = stdentities;
-
-		while (*ep) {
+		for (ep = stdentities; ep->text; ep++) {
 			p = tmp;
-			while ((p = strstr(p, *ep)) != NULL) {
-				elen = strlen(*(ep + 1));
+			if (is_html && ep->not_html)  {
+				continue;
+			}
+			while ((p = strstr(p, ep->text)) != NULL) {
+				elen = strlen(ep->entity);
 
-				xstr = xstrdup(p + strlen(*ep));
+				xstr = xstrdup(p + strlen(ep->text));
 
-				strcpy(p, *(ep + 1));
+				strcpy(p, ep->entity);
 				strcpy(p + elen, xstr);
 
 				xfree(xstr);
 
 				p += elen;
 			}  
-			ep += 2;
 		}
 	}
 
@@ -925,3 +1022,17 @@ char * xml_entitize(const char * str)
 	}	
 	return (tmp);
 }
+
+/*
+ * Public callers for the above to hide the absence of &apos from HTML
+ */
+
+char * xml_entitize(const char * str) 
+{
+	return entitize(str, 0);
+}
+
+char * html_entitize(const char * str) 
+{
+	return entitize(str, 1);
+}
diff --git a/gpsbabel/vecs.c b/gpsbabel/vecs.c
index 46a1fa714..c205bf5eb 100644
--- a/gpsbabel/vecs.c
+++ b/gpsbabel/vecs.c
@@ -60,6 +60,8 @@ extern ff_vecs_t geoniche_vecs;
 extern ff_vecs_t gpl_vecs;
 extern ff_vecs_t ozi_vecs;
 extern ff_vecs_t nmea_vecs;
+extern ff_vecs_t text_vecs;
+extern ff_vecs_t html_vecs;
 
 static
 vecs_t vec_list[] = {
@@ -244,6 +246,18 @@ vecs_t vec_list[] = {
 		"NMEA 0183 sentences",
 		NULL
 	},
+	{
+		&text_vecs,
+		"text",
+		"Textual Output",
+		NULL
+	},
+	{
+		&html_vecs,
+		"html",
+		"HTML Output",
+		NULL
+	},
 	{
 		NULL,
 		NULL,
-- 
2.30.2